Year-wise Percentage of Internet Users Globally

python
plotly
Author

Arnab Das

Published

January 4, 2024

Data:

The provided code reads data from the World Bank’s indicator for internet users (% of population), available at World Bank Internet Users Data, and processes it for visualization using Plotly. After reading the data, fill missing values by forward-filling along the rows to replace NaN values, ensuring a smoother time series representation. The cleaned data is then reshaped into a long format using pd.melt() to prepare it for plotting with Plotly. Non-numeric values are filtered out to ensure data integrity. This structured dataset, named melted_data, is now ready for visualization, providing insights into internet usage trends across different countries over time.

Load Python libraries commonly used for data analysis, and visualization.

Show the code
import pandas as pd
import plotly.graph_objects as go
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import sklearn
import scipy
import kaleido
from PIL import Image
import plotly.io as pio
import io
from itables import init_notebook_mode
import os
import plotly.express as px
Show the code
init_notebook_mode(all_interactive=True)
data = pd.read_csv("interdata.csv")

# front fill to replace NA (2016 -> 2017 -> 2018)
clean_data = data.fillna(method = "ffill", axis=1)

# structure data for plotly
melted_data = pd.melt(clean_data, id_vars=['Country Name', 'Country Code'], var_name='Year', value_name='Value')
# remove rows with non-numeric values
melted_data = melted_data[pd.to_numeric(melted_data['Value'], errors='coerce').notnull()]
display(melted_data)
Country Name Country Code Year Value
Loading... (need help?)

Geospatial visualization:

Interactive:

The code below creates an animated choropleth map to visualize country-wise percentage of internet users globally over time. Animation controls allow users to play through the frames, visualizing changes over time. This interactive visualization offers insights into the evolution of internet usage across different countries globally, providing a dynamic representation of population trends.

Show the code
melted_data['Value'] = pd.to_numeric(melted_data['Value'])
melted_data['Year'] = melted_data['Year'].astype(str)
melted_data['Country Code'] = melted_data['Country Code'].astype(str)


pio.renderers.default = "notebook"
from urllib.request import urlopen
import json
with urlopen('https://raw.githubusercontent.com/johan/world.geo.json/master/countries.geo.json') as response:
    countries = json.load(response)

fig = px.choropleth_mapbox(
    melted_data,
    geojson=countries,
    locations="Country Code",
    color_continuous_scale="Ice",    
    animation_frame="Year",
    color= 'Value',
    labels={'Value': 'Percentage'},
    mapbox_style="carto-positron",  # Use a dark map style
    zoom=1,  # Set the initial zoom level,
    range_color = (0,100),
    opacity=0.5,
    hover_name="Country Name",
    height= 600,
    width= 800,
)

    
fig.update_layout(
    title= "Internet Users Globally",
    geo=dict(
        showframe=False,
        showcoastlines=False,
    ),
    paper_bgcolor='black',  # Set the overall background color to black
    font=dict(color='white'),  # Set font color to white
    height=500,  # Set the height of the plot interface
    updatemenus=[dict(type='buttons',
                      showactive=False,
                      y=-0.15,
                      x= -0.1,
                      xanchor='left',
                      yanchor='bottom')
                 ],
)

fig.update_layout(
    font=dict(
        color="white",  # Set font color to white
        size=15,  # Set font size
    ),
)
fig.update_layout(
    hoverlabel=dict(
        bgcolor="#18191A",
        font_size=12,
        font_family="Arial",
    )
)

fig['layout']['sliders'][0]['pad'] = dict(r=70, t=0.0,)

fig.show()

GIF:

This code snippet converts the above interactive plot into a GIF which offers a dynamic portrayal of global internet usage trends over time, emphasizing localized shifts in internet adoption rates.

Show the code
maps = []
for year in melted_data['Year'].unique():
    # Filter data for the current year
    data_year = melted_data[melted_data['Year'] == year]
    
    # Create a Choropleth map for the current year
    choropleth_map = go.Choroplethmapbox(
        geojson=countries,
        locations = data_year['Country Code'],
        z = data_year['Value'],
        text=data_year['Country Name'],
        colorscale = 'Ice',
        zmin = 0,
        zmax = 100,
        colorbar_title = 'Value',
        name=str(year),
        marker=dict(opacity=0.5),
    )
    fig = go.Figure(choropleth_map)
    fig.update_layout(mapbox_style="carto-positron",
                      margin=dict(l=0, r=0, t=30, b=0),  # Set margin to 0 to remove padding
                      title=f"Internet users by year {year}", 
                      paper_bgcolor='black',  # Set the overall background color to black
                      font=dict(color='white'),  # Set font color to white
                      height= 600,
                      width= 1000,
                      mapbox=dict(
                        center=dict(lat=30, lon=0),  # Set the center of the map
                        zoom=1,  # Set the initial zoom level
                    )           
                )
    fig.update_layout(
    font=dict(
        color="white",  # Set font color to white
        size=15,        # Set font size
        )
    )  
    # Append map to map list
    maps.append(fig.to_image(format="png", height= 600, width= 1000))
images_pil = []

# Convert each PNG image byte to a PIL Image object
for image_bytes in maps:
    image_pil = Image.open(io.BytesIO(image_bytes))
    images_pil.append(image_pil)

# create gif
images_pil[0].save("maps_animation.gif",
                   save_all = True,
                   append_images = images_pil[1:],
                   duration = 120,
                   loop = 0,
                   disposal=2,
                   quality = 50)

maps_animation.gif

Barplot:

This snippet processes country data from a CSV file, adjusts column names, merges it with additional data based on country names, then displays the combined dataset, effectively consolidating continent and country information.

Show the code
country_info = pd.read_csv("countryinfo.csv")
country_info.rename(columns={'name': 'Country Name'}, inplace=True)
merged_df = pd.merge(melted_data, country_info[['Country Name', 'region']], on='Country Name', how='left').dropna()

display(merged_df)
Country Name Country Code Year Value region
Loading... (need help?)

Interactive:

This code generates bar plots, depicting the internet usage percentage data against country with color encoding for continents. The plot includes animation for yearly changes.

Show the code
# Plotting using Plotly Express
fig = px.bar(merged_df, x='Country Name', y='Value', color='region',
                 title='Value by Country', template = 'plotly_dark',
                 hover_data=['Country Code'], animation_frame="Year")
fig.update_xaxes(showticklabels=False, title_text='Countries')
fig.update_traces(textfont_size=12, textangle=0, textposition="outside", cliponaxis=False)

# Customize layout
fig.update_layout(
    title="Internet users by year",
    yaxis_title="Percentage",
    font=dict(size=14),
    height = 600

)
fig.update_layout(updatemenus=[dict(type='buttons',
                  showactive=False,
                  y=-0.34,
                  x=-0.09,
                  xanchor='left',
                  yanchor='bottom')
])

# Show plot
fig.show()

GIF

A GIF is created from this interactive plot, capturing each frame’s visualization to showcase the dynamic evolution of internet usage across countries over time.

Show the code
bars = []
for year in merged_df['Year'].unique():
    # Filter data for the current year
    data_year = merged_df[merged_df['Year'] == year]
    
    # Create bars for the current year
    bar = px.bar(data_year, x='Country Name', y='Value', color='region',
                 title='Value by Country', template = 'plotly_dark',
                 hover_data=['Country Code'])
    bar.update_yaxes(range=[0, 100])
    bar.update_xaxes(showticklabels=False)
    bar.update_xaxes(tickangle=40)
    bar.update_layout(height= 400)
    bar.update_xaxes(title_text='Countries')
    bar.update_yaxes(title_text='Percentage')
    # Append bar to bar list
    bars.append(go.Figure(data=bar).update_layout(title=f"Internet users by year {year}", font=dict(size=14)
).to_image(format="png"))
    # Set labels for x-axis and y-axis
    images_pil = []
    
# Convert each PNG image byte to a PIL Image object
for image_bytes in bars:
    image_pil = Image.open(io.BytesIO(image_bytes))
    images_pil.append(image_pil)

# create gif
images_pil[0].save("bars_animation.gif",
                   save_all = True,
                   append_images = images_pil[1:],
                   duration = 100,
                   loop = 0)  

bars_animation.gif